### Quais Politicas Importam? RBCS

## Conteudo Agenda Legisladores

# Materias Legislativo

library(readr)
library(foreign)
library(broom)
library(tidytext)
library(readr)
library(tokenizers)
library(SnowballC) 
library(tm)
library (topicmodels)
library(ggplot2)
library(systemfit)
library(interplot)
library(coefplot)
library(tidyverse)
library(stargazer)
library(Zelig)
library(MASS)
library(cowplot)
library(jtools)
library(ggpubr)



mat <- read.csv("MateriasCam.csv", header=TRUE, sep=";", stringsAsFactors = FALSE, fileEncoding="UTF-8")

guess_encoding("MateriasCam.csv", n_max = 1000)


mat$Ementa <- toupper(mat$Ementa) # coloca maiúscula


#### Unsupervised

mc<-Corpus(VectorSource(mat$Ementa))


mc <- tm_map(mc, removeNumbers)
mc <- tm_map(mc, removePunctuation)
mc <- tm_map(mc, stripWhitespace)
mc <- tm_map(mc, tolower)
mc <- tm_map(mc, removeWords, stopwords("portuguese")) 
mc <- tm_map(mc, stemDocument, language = "portuguese")

dtm <- DocumentTermMatrix(mc)

findFreqTerms(dtm, 5)


mc <- tm_map(mc, removeWords, c("alter","dispor","julh","lei","nº","sobr","agost","mai","dispo",
                                "ement","artig","paragraf","redaca","determin","acrescent","alteraca",
                                "estabelec","incis","legisl","modif","dezembr","disposit","junh","xxi",
                                "capitul","iii","vii","setembr","caput","janeir","leis","fevereir","xii",
                                "outubr","novembr","xlii","emend","fern","fhc","abril","originari","silv",
                                "assunt","explicaca","nºs","nov","revog","outr","providenc","institu",
                                "autoriz","explicaca","art","constituica","decretol","process","complement",
                                "execut","ambit","brasil","brasileir","pod","cri","regulament","inclu",
                                "ministeri","ser","part","exercici","acresc","projet",
                                "especif","ate","arts","med","praz","cf","cent","fix"))



dtm <- DocumentTermMatrix(mc)

findFreqTerms(dtm, 5)


rowTotals <- apply(dtm , 1, sum) 
dtm   <- dtm[rowTotals> 0, ]

dtm


# Parametros do Gibbs sampling

burnin <- 5000
iter <- 10000
thin <- 100
seed <-list(1,2,3,4,5)
nstart <- 5
best <- TRUE


# Topicos

k <- 7

# LDA

ldabr <-LDA(dtm,k, method ="Gibbs", control=list(nstart=nstart, seed = seed, best=best, burnin = burnin, iter = iter, thin=thin))


# Grafico Termos-Topicos

ldabrtd <- tidy(ldabr)
ldabrtd

terms(ldabr)

topics(ldabr)

top_terms <- ldabrtd %>%
  group_by(topic) %>%
  top_n(10, beta) %>%
  ungroup() %>%
  arrange(topic, -beta)
top_terms


top_terms %>%
  mutate(term = reorder(term, beta)) %>%
  ggplot(aes(term, beta, fill = factor(topic))) + labs(x="Termo",y="Beta") +theme_bw() +
  geom_bar(alpha = 0.8, stat = "identity", show.legend = FALSE, fill="black") +
  facet_wrap(~ topic, scales = "free") +
  coord_flip()

write_csv(top_terms, "CamTopTerms.csv")


## Rotulando Topicos

top_terms$Topics<-as.character(top_terms$topic)


top_terms$Topics[top_terms$Topics=="1"] <- "1. Lei e Crime"
top_terms$Topics[top_terms$Topics=="2"] <- "5. Regulação"
top_terms$Topics[top_terms$Topics=="3"] <- "2. Tributação"
top_terms$Topics[top_terms$Topics=="4"] <- "7. Local"
top_terms$Topics[top_terms$Topics=="5"] <- "3. Trabalho"
top_terms$Topics[top_terms$Topics=="6"] <- "4. Social"
top_terms$Topics[top_terms$Topics=="7"] <- "6. Serviços"


## Graficos Termos-Topicos Rotulados

top_terms %>%
  mutate(term = reorder(term, beta)) %>%
  ggplot(aes(term, beta, fill = factor(Topics))) + labs(x="Termo",y="Beta") + theme_bw() +
  geom_bar(alpha = 0.8, stat = "identity", show.legend = FALSE, fill="black") +
  facet_wrap(~ Topics, scales = "free") +
  coord_flip()

## Gamma

lda_gamma <- tidy(ldabr, matrix = "gamma")
lda_gamma

ldabr.topics <- as.matrix(topics(ldabr))
ldabr.topics
write.csv(ldabr.topics,file=paste("LDAGibbs",k,"DocsToTopics.csv"))


topicProbabilities <- as.data.frame(ldabr@gamma)
topicProbabilities
write.csv(topicProbabilities,file=paste("LDAGibbs",k,"TopicProbabilities.csv"))

######

# Mergir Banco Original

topics <- read.csv("LDAGibbs 7 DocsToTopics.csv", header=TRUE, sep=",", fileEncoding="UTF-8",stringsAsFactors = FALSE)


matem <- read.csv("MateriasCam.csv", header=TRUE, sep=";",stringsAsFactors = FALSE, fileEncoding="UTF-8")
matem <- matem %>% mutate(X = row_number())


matltopics<- merge(matem, topics, by = c("X"), all.x = TRUE)

matltopics$Topics<-as.character(matltopics$V1)

matltopics$Topics[matltopics$Topics=="1"] <- "1. Lei e Crime"
matltopics$Topics[matltopics$Topics=="2"] <- "5. Regulação"
matltopics$Topics[matltopics$Topics=="3"] <- "2. Tributação"
matltopics$Topics[matltopics$Topics=="4"] <- "7. Local"
matltopics$Topics[matltopics$Topics=="5"] <- "3. Trabalho"
matltopics$Topics[matltopics$Topics=="6"] <- "4. Social"
matltopics$Topics[matltopics$Topics=="7"] <- "6. Serviços"


save(matltopics,file="MateriasTopicos.Rda")
write_csv(matltopics, "MateriasTopicos.csv")


## Amostra Validacao

sampmat<-sample_n(matltopics, 380)
sampmat <- subset(sampmat, select = c(cod_id,Proposicao,Ementa,Topics))
write_csv(sampmat, "AmostraVal.csv")


############

## Organização e Criação de Variáveis

matltopics <- read.csv("MateriasTopicos.csv", header=TRUE, sep=",",stringsAsFactors = FALSE, fileEncoding="UTF-8")


matltopics$V5<-1

matltopics$Legislatura <- as.factor(matltopics$Governo)

matltopics$Tema2 <- as.factor(matltopics$Tema)

levels(matltopics$Tema2)

matltopics$Topic2 <- matltopics$Topics


matltopics[matltopics$Tema2 == "Homenagens e Datas Comemorativas", "Topic2"] <- "8. Homenagens"

matltopics$Topic3 <- matltopics$Topics
matltopics$Ementa <- toupper(matltopics$Ementa) 

matltopics$DataComemorativa<-grepl("DIA NACIONAL", matltopics$Ementa)
matltopics[matltopics$DataComemorativa == "TRUE", "Topic3"] <- "8. Data Comemorativa"

matltopics$Topic3<-as.factor(matltopics$Topic3)


levels(matltopics$Topic3)

table(matltopics$DataComemorativa)



matltopics$Partido2<-matltopics$Partido


matltopics$Partido <- toupper(matltopics$Partido) 

trim <- function (x) gsub("^\\s+|\\s+$", "", x) # Funcao para remover espaco em branco

matltopics$Partido <- trim(matltopics$Partido)

coal <- read.csv("Coalizao.csv", header=TRUE, sep=";",stringsAsFactors = FALSE, fileEncoding="UTF-8")

coal$Partido <- toupper(coal$Partido) 
coal$Partido <- trim(coal$Partido)

matltopics$Ano<-matltopics$AnoInicio


matltopics2<- merge(matltopics, coal, by = c("Ano", "Partido"), all = TRUE)

require(dplyr)
matltopics2 <-  mutate(matltopics2, Coalizao = ifelse(is.na(Coalizao),0,Coalizao))


eficmat2 <- matltopics2


eficmat2$PartidoRev <- eficmat2$Partido

eficmat2[eficmat2$Partido == "PFL", "PartidoRev"] <- "DEM"

eficmat2[eficmat2$Partido == "SD", "PartidoRev"] <- "SDD"


#Saliencia Tematica


## Figura 2: Ênfases temáticas da agenda legislativa

t1 <- aggregate(V5 ~ Topics, eficmat2, sum)
t1$Perc <- (t1$V5 / sum(t1$V5)*100)

ggplot(t1,aes(x= reorder(Topics,-Perc),Perc))+geom_bar(stat ="identity") + labs(x="",y="%") +  theme_bw() +
  theme(legend.position="bottom") + geom_text(aes(label=sprintf("%0.2f", round(Perc, digits = 2))), vjust=-0.3, color="black", size=2.0) +
  theme(plot.title = element_text(hjust = 0.5)) +
  theme(legend.title = element_blank()) 


## Figura 3: Dinâmica temporal das ênfases temáticas

t1 <- aggregate(V5 ~ Topics+Ano, eficmat2, sum)
t2 <- aggregate(V5 ~ Ano, eficmat2, sum)
t3 <-merge(t1, t2, by = c("Ano"), all.x = TRUE)
t3$Perc <- ((t3$V5.x / t3$V5.y)*100)


ggplot(t3, aes(x=Ano, y=Perc)) + geom_line() + labs(x="",y="%") + facet_wrap(~ Topics) + theme(axis.text.x=element_text(angle=90)) + theme(legend.position = "none")+ theme_bw() +
  geom_vline(xintercept = c(1998,2002,2006,2010), colour = "red1", linetype = 2)


mean(t2$V5)

## Figura 4: Saliência temática e partidos políticos no Brasil (1995-2014)

t1 <- aggregate(V5 ~ Topics+PartidoRev, eficmat2, sum)
t2 <- aggregate(V5 ~ PartidoRev, eficmat2, sum)
t2 <- t2[c(11,34,2,28,7,25,14,35,10,15,18,17,6,27,39,26,19,12,31,30), ]
t3 <-merge(t1, t2, by = c("PartidoRev"), all.x = TRUE)
t3$Perc <- ((t3$V5.x / t3$V5.y)*100)
t3<-na.omit(t3)

ggplot(t3,aes(x= Topics,Perc))+geom_bar(stat ="identity") + labs(x="",y="%") +  theme_bw() + facet_wrap(~ PartidoRev)+
  theme(legend.position="bottom") +
  theme(plot.title = element_text(hjust = 0.5)) +
  theme(legend.title = element_blank()) + theme(axis.text.x=element_text(angle=90))


## Figura 5: Ênfases temáticas – coalizão X oposição

t1 <- aggregate(V5 ~ Topics+Coalizao, eficmat2, sum)
t2 <- aggregate(V5 ~ Coalizao, eficmat2, sum)
t3 <-merge(t1, t2, by = c("Coalizao"), all.x = TRUE)
t3$Perc <- ((t3$V5.x / t3$V5.y)*100)

coalizao <- c(
  `0` = "Oposição",
  `1` = "Coalizão"
)

ggplot(t3,aes(x= Topics,Perc))+geom_bar(stat ="identity") + labs(x="",y="%") +  theme_bw() + facet_wrap(Coalizao~., labeller = as_labeller(coalizao))+
  theme(legend.position="bottom") + geom_text(aes(label=sprintf("%0.1f", round(Perc, digits = 1))), vjust=-0.3, color="black", size=2.0) +
  theme(plot.title = element_text(hjust = 0.5)) +
  theme(legend.title = element_blank()) + theme(axis.text.x=element_text(angle=90))




## Figura 6: Saliência temática e distrito eleitoral

t1 <- aggregate(V5 ~ Topics+UF, eficmat2, sum)
t2 <- aggregate(V5 ~ UF, eficmat2, sum)
t3 <-merge(t1, t2, by = c("UF"), all.x = TRUE)
t3$Perc <- ((t3$V5.x / t3$V5.y)*100)
t3 <- t3[-c(1:7), ]

ggplot(t3,aes(x= Topics,Perc))+geom_bar(stat ="identity") + labs(x="",y="%") +  theme_bw() + facet_wrap(~ UF)+
  theme(legend.position="bottom") +
  theme(plot.title = element_text(hjust = 0.5)) +
  theme(legend.title = element_blank()) + theme(axis.text.x=element_text(angle=90))



